package com.spider.service;

import java.util.List;

import javax.management.JMException;

import com.spider.dao.HBaseDAOImp;
import com.spider.dao.HBaseDao;

import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.monitor.SpiderMonitor;
import us.codecraft.webmagic.processor.PageProcessor;
import us.codecraft.webmagic.scheduler.FileCacheQueueScheduler;

public class QuesPageProcessKnow implements PageProcessor{

	private Site site = Site.me().setRetryTimes(3).setSleepTime(1000).setTimeOut(10000);
	private HBaseDao dao = new HBaseDAOImp();
	
	@Override
	public Site getSite() {
		// TODO Auto-generated method stub
		return site;
	}

	@Override
	public void process(Page page) {
		// TODO Auto-generated method stub
		
//		page.putField("Know", page.getHtml().xpath("//a[@class='f-toe classb']/@title | //a[@class='f-toe classc']/text()").all());
//		page.putField("Know", page.getHtml().xpath("//a[@class='f-toe classc']/text()").all());
//		page.putField("Know", page.getHtml().xpath("//a[@class='f-toe classb']/@title").all());
		
		page.putField("Know1", page.getHtml().xpath("//a[@class='f-toe classa  z-sel']/@title").all());
		page.putField("Know2", page.getHtml().xpath("//a[@class='f-toe classa']/@title").all());
		
		List<String> urlBook = page.getHtml()
				.xpath("//div[@id=filter-book]")
				.links().regex(".*bid=.*").all();
		
		page.addTargetRequests(urlBook);
	}

	
	public static void main(String[] args) {
		Spider spider = Spider.create(new QuesPageProcessKnow())
		.addUrl("http://k12.tiku.com/testPaper.html?hdSearch=&key=&sct=0&cn=%E7%89%A9%E7%90%86&st=2&cid=500011&bid=800074&vid=800013&sort=0")
		.thread(5);
		
		try {
			SpiderMonitor.instance().register(spider);
		} catch (JMException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		spider.run();
		
	}
}
